%autosave 300
Autosaving every 300 seconds
#Print all the outputs in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import numpy as np
import seaborn as sns
%pylab inline
import sklearn as sk
import sklearn.tree
from IPython.display import Image
import pydotplus
import plotly.express as px
import sklearn.tree as tree
#Suppress Pandas future warning
import warnings
warnings.filterwarnings('ignore')
Populating the interactive namespace from numpy and matplotlib
pd.set_option('display.max_columns',None)
df = pd.read_csv('Telco_Data_V3.csv')
df.head()
| LoyaltyID | Customer ID | Senior Citizen | Partner | Dependents | Tenure | Phone Service | Multiple Lines | Internet Service | Online Security | Online Backup | Device Protection | Tech Support | Streaming TV | Streaming Movies | Contract | Paperless Billing | Payment Method | Monthly Charges | Total Charges | Gender | Age | Under 30 | Senior Citizen.1 | Married | Dependents.1 | Number of Dependents | City | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Customer Status | Churn Value | Churn Score | Churn Category | Churn Reason | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 318537 | 7590-VHVEG | No | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | Female | 36 | No | No | Yes | No | 0 | Los Angeles | 90001 | 33.973616 | -118.249020 | 54492 | 29.85 | 3 | Joined | 0 | 27 | 0 | 0 |
| 1 | 152148 | 5575-GNVDE | No | No | No | 34 | Yes | No | DSL | Yes | No | Yes | No | No | No | One year | No | Mailed check | 56.95 | 1889.5 | Male | 46 | No | No | No | No | 0 | Los Angeles | 90002 | 33.949255 | -118.246978 | 44586 | 2470.56 | 3 | Stayed | 0 | 61 | 0 | 0 |
| 2 | 326527 | 3668-QPYBK | No | No | No | 2 | Yes | No | DSL | Yes | Yes | No | No | No | No | Month-to-month | Yes | Mailed check | 53.85 | 108.15 | Male | 37 | No | No | No | No | 0 | Los Angeles | 90003 | 33.964131 | -118.272783 | 58198 | 129.09 | 1 | Churned | 1 | 86 | Competitor | Competitor made better offer |
| 3 | 845894 | 7795-CFOCW | No | No | No | 45 | No | No phone service | DSL | Yes | No | Yes | Yes | No | No | One year | No | Bank transfer (automatic) | 42.30 | 1840.75 | Male | 53 | No | No | No | No | 0 | Los Angeles | 90004 | 34.076259 | -118.310715 | 67852 | 1840.75 | 3 | Stayed | 0 | 66 | 0 | 0 |
| 4 | 503388 | 9237-HQITU | No | No | No | 2 | Yes | No | Fiber optic | No | No | No | No | No | No | Month-to-month | Yes | Electronic check | 70.70 | 151.65 | Female | 19 | Yes | No | No | Yes | 2 | Los Angeles | 90005 | 34.059281 | -118.307420 | 43019 | 169.89 | 2 | Churned | 1 | 67 | Other | Moved |
df.corr()
| LoyaltyID | Tenure | Monthly Charges | Age | Number of Dependents | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Churn Value | Churn Score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| LoyaltyID | 1.000000 | -0.003633 | -0.006984 | -0.018213 | 0.005464 | -0.022660 | -0.030903 | 0.027215 | 0.020175 | -0.007301 | -0.000186 | 0.009513 | 0.013237 |
| Tenure | -0.003633 | 1.000000 | 0.247900 | 0.010202 | 0.107689 | 0.007362 | 0.012075 | -0.009716 | -0.012388 | 0.853078 | 0.210056 | -0.352229 | -0.224523 |
| Monthly Charges | -0.006984 | 0.247900 | 1.000000 | 0.144685 | -0.131837 | -0.010775 | -0.030325 | 0.028429 | 0.009597 | 0.588887 | -0.223922 | 0.193356 | 0.132115 |
| Age | -0.018213 | 0.010202 | 0.144685 | 1.000000 | -0.119000 | -0.008183 | -0.010305 | 0.007612 | -0.018881 | 0.048265 | -0.085170 | 0.115760 | 0.083919 |
| Number of Dependents | 0.005464 | 0.107689 | -0.131837 | -0.119000 | 1.000000 | 0.016493 | 0.029081 | -0.024271 | -0.015486 | 0.038038 | 0.152873 | -0.218780 | -0.160743 |
| Zip Code | -0.022660 | 0.007362 | -0.010775 | -0.008183 | 0.016493 | 1.000000 | 0.894769 | -0.790564 | -0.424067 | 0.002944 | 0.002121 | -0.016289 | -0.018020 |
| Latitude | -0.030903 | 0.012075 | -0.030325 | -0.010305 | 0.029081 | 0.894769 | 1.000000 | -0.885979 | -0.434801 | 0.000082 | 0.022367 | -0.041546 | -0.037615 |
| Longitude | 0.027215 | -0.009716 | 0.028429 | 0.007612 | -0.024271 | -0.790564 | -0.885979 | 1.000000 | 0.296288 | 0.001062 | -0.009048 | 0.024052 | 0.019394 |
| Population | 0.020175 | -0.012388 | 0.009597 | -0.018881 | -0.015486 | -0.424067 | -0.434801 | 0.296288 | 1.000000 | -0.010546 | -0.031056 | 0.051649 | 0.020012 |
| Total Revenue | -0.007301 | 0.853078 | 0.588887 | 0.048265 | 0.038038 | 0.002944 | 0.000082 | 0.001062 | -0.010546 | 1.000000 | 0.094796 | -0.223003 | -0.138072 |
| Satisfaction Score | -0.000186 | 0.210056 | -0.223922 | -0.085170 | 0.152873 | 0.002121 | 0.022367 | -0.009048 | -0.031056 | 0.094796 | 1.000000 | -0.754649 | -0.495144 |
| Churn Value | 0.009513 | -0.352229 | 0.193356 | 0.115760 | -0.218780 | -0.016289 | -0.041546 | 0.024052 | 0.051649 | -0.223003 | -0.754649 | 1.000000 | 0.660772 |
| Churn Score | 0.013237 | -0.224523 | 0.132115 | 0.083919 | -0.160743 | -0.018020 | -0.037615 | 0.019394 | 0.020012 | -0.138072 | -0.495144 | 0.660772 | 1.000000 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 39 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 LoyaltyID 7043 non-null int64 1 Customer ID 7043 non-null object 2 Senior Citizen 7043 non-null object 3 Partner 7043 non-null object 4 Dependents 7043 non-null object 5 Tenure 7043 non-null int64 6 Phone Service 7043 non-null object 7 Multiple Lines 7043 non-null object 8 Internet Service 7043 non-null object 9 Online Security 7043 non-null object 10 Online Backup 7043 non-null object 11 Device Protection 7043 non-null object 12 Tech Support 7043 non-null object 13 Streaming TV 7043 non-null object 14 Streaming Movies 7043 non-null object 15 Contract 7043 non-null object 16 Paperless Billing 7043 non-null object 17 Payment Method 7043 non-null object 18 Monthly Charges 7043 non-null float64 19 Total Charges 7043 non-null object 20 Gender 7043 non-null object 21 Age 7043 non-null int64 22 Under 30 7043 non-null object 23 Senior Citizen.1 7043 non-null object 24 Married 7043 non-null object 25 Dependents.1 7043 non-null object 26 Number of Dependents 7043 non-null int64 27 City 7043 non-null object 28 Zip Code 7043 non-null int64 29 Latitude 7043 non-null float64 30 Longitude 7043 non-null float64 31 Population 7043 non-null int64 32 Total Revenue 7043 non-null float64 33 Satisfaction Score 7043 non-null int64 34 Customer Status 7043 non-null object 35 Churn Value 7043 non-null int64 36 Churn Score 7043 non-null int64 37 Churn Category 7043 non-null object 38 Churn Reason 7043 non-null object dtypes: float64(4), int64(9), object(26) memory usage: 2.1+ MB
df1 = df.copy()
#Replacing strings with numbers 1 or 0
df1.replace({'Yes':1,'No':0,'No phone service':0,'No internet service':0},inplace = True)
#Drop duplicate columns or columns that are not useful for our analysis
df1.drop(columns = ['LoyaltyID','Customer ID','Senior Citizen.1','Married','Dependents.1','Customer Status'],inplace = True)
df1.head(1)
| Senior Citizen | Partner | Dependents | Tenure | Phone Service | Multiple Lines | Internet Service | Online Security | Online Backup | Device Protection | Tech Support | Streaming TV | Streaming Movies | Contract | Paperless Billing | Payment Method | Monthly Charges | Total Charges | Gender | Age | Under 30 | Number of Dependents | City | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Churn Value | Churn Score | Churn Category | Churn Reason | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | DSL | 0 | 1 | 0 | 0 | 0 | 0 | Month-to-month | 1 | Electronic check | 29.85 | 29.85 | Female | 36 | 0 | 0 | Los Angeles | 90001 | 33.973616 | -118.24902 | 54492 | 29.85 | 3 | 0 | 27 | 0 | 0 |
df1.isna().sum()
Senior Citizen 0 Partner 0 Dependents 0 Tenure 0 Phone Service 0 Multiple Lines 0 Internet Service 0 Online Security 0 Online Backup 0 Device Protection 0 Tech Support 0 Streaming TV 0 Streaming Movies 0 Contract 0 Paperless Billing 0 Payment Method 0 Monthly Charges 0 Total Charges 0 Gender 0 Age 0 Under 30 0 Number of Dependents 0 City 0 Zip Code 0 Latitude 0 Longitude 0 Population 0 Total Revenue 0 Satisfaction Score 0 Churn Value 0 Churn Score 0 Churn Category 0 Churn Reason 0 dtype: int64
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7043 entries, 0 to 7042 Data columns (total 33 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Senior Citizen 7043 non-null int64 1 Partner 7043 non-null int64 2 Dependents 7043 non-null int64 3 Tenure 7043 non-null int64 4 Phone Service 7043 non-null int64 5 Multiple Lines 7043 non-null int64 6 Internet Service 7043 non-null object 7 Online Security 7043 non-null int64 8 Online Backup 7043 non-null int64 9 Device Protection 7043 non-null int64 10 Tech Support 7043 non-null int64 11 Streaming TV 7043 non-null int64 12 Streaming Movies 7043 non-null int64 13 Contract 7043 non-null object 14 Paperless Billing 7043 non-null int64 15 Payment Method 7043 non-null object 16 Monthly Charges 7043 non-null float64 17 Total Charges 7043 non-null object 18 Gender 7043 non-null object 19 Age 7043 non-null int64 20 Under 30 7043 non-null int64 21 Number of Dependents 7043 non-null int64 22 City 7043 non-null object 23 Zip Code 7043 non-null int64 24 Latitude 7043 non-null float64 25 Longitude 7043 non-null float64 26 Population 7043 non-null int64 27 Total Revenue 7043 non-null float64 28 Satisfaction Score 7043 non-null int64 29 Churn Value 7043 non-null int64 30 Churn Score 7043 non-null int64 31 Churn Category 7043 non-null object 32 Churn Reason 7043 non-null object dtypes: float64(4), int64(21), object(8) memory usage: 1.8+ MB
#Convert object type to numeric value
df1['Total Charges'] = pd.to_numeric(df1['Total Charges'], errors='coerce')
#Calculate the churn rate based on each zip code
df_churn_rate = pd.DataFrame(df1.groupby('Zip Code')['Churn Value'].mean()).\
rename(columns = {'Churn Value':'churn_rate'})
#Df_churn_rate.head(1)
#Merging in zip code so we can view churn rate by customer by zip code
df1 = df1.merge(df_churn_rate, left_on = 'Zip Code', right_on= 'Zip Code',how='left')
#df1.head(1)
df1 = pd.get_dummies(df1, columns=['Internet Service','Contract'],dummy_na=False)
df2 = df1.copy()
df2 = pd.get_dummies(df2, columns=['Payment Method','Gender','Churn Reason'])
df_churn = df2[df2['Churn Value'] == 1]
#df_churn.head(1)
df3 = df1.copy()
df3 = pd.get_dummies(df3, columns=['City','Payment Method','Gender'])
df1.head(1)
| Senior Citizen | Partner | Dependents | Tenure | Phone Service | Multiple Lines | Online Security | Online Backup | Device Protection | Tech Support | Streaming TV | Streaming Movies | Paperless Billing | Payment Method | Monthly Charges | Total Charges | Gender | Age | Under 30 | Number of Dependents | City | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Churn Value | Churn Score | Churn Category | Churn Reason | churn_rate | Internet Service_0 | Internet Service_DSL | Internet Service_Fiber optic | Contract_Month-to-month | Contract_One year | Contract_Two year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | Electronic check | 29.85 | 29.85 | Female | 36 | 0 | 0 | Los Angeles | 90001 | 33.973616 | -118.24902 | 54492 | 29.85 | 3 | 0 | 27 | 0 | 0 | 0.25 | 0 | 1 | 0 | 1 | 0 | 0 |
#Dropping N/As from our dataset given small value
df3.isna().any().sum()
1
df3.dropna(inplace=True)
len(df3)
7032
https://community.ibm.com/accelerators/?context=analytics&query=telco%20customer&type=Data
This dataset was sourced from IBM and contains customer information from a Telecommunications company (Telco) that provides a variety of phone and internet services in California. Each row represents a customer and the columns provide different attributes of each customer such as demographic information, the type of services they are enrolled in, and whether or not they churned (i.e. left the company).
**Variables of Consideration**
dt = tree.DecisionTreeClassifier(max_depth=2)
#Removing all highly correlated columns to 'Churn Value'
X = df3.drop(columns=['Churn Value','Satisfaction Score','Churn Reason',\
'Churn Category','Churn Score','churn_rate'])
Y = df3['Churn Value']
dt.fit(X,Y)
DecisionTreeClassifier(max_depth=2)
dt_feature_names = list(X.columns)
dt_target_names = [str(s) for s in Y.unique()]
tree.export_graphviz(dt, out_file='tree.dot',
feature_names=dt_feature_names, class_names=dt_target_names,
filled=True)
graph = pydotplus.graph_from_dot_file('tree.dot')
Image(graph.create_png())
#Churn by Internet Service Type which indicates Fiber Optic has definitively higher churn than DSL.
sns.countplot(x='Churn Value',hue='Internet Service',data=df)
<AxesSubplot:xlabel='Churn Value', ylabel='count'>
#Churn by Contract Type which indicates Month-to-Month contracts have a definitively higher churn rate than longer term contracts.
sns.countplot(x='Churn Value',hue='Contract',data=df)
<AxesSubplot:xlabel='Churn Value', ylabel='count'>
df.head(1)
| LoyaltyID | Customer ID | Senior Citizen | Partner | Dependents | Tenure | Phone Service | Multiple Lines | Internet Service | Online Security | Online Backup | Device Protection | Tech Support | Streaming TV | Streaming Movies | Contract | Paperless Billing | Payment Method | Monthly Charges | Total Charges | Gender | Age | Under 30 | Senior Citizen.1 | Married | Dependents.1 | Number of Dependents | City | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Customer Status | Churn Value | Churn Score | Churn Category | Churn Reason | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 318537 | 7590-VHVEG | No | Yes | No | 1 | No | No phone service | DSL | No | Yes | No | No | No | No | Month-to-month | Yes | Electronic check | 29.85 | 29.85 | Female | 36 | No | No | Yes | No | 0 | Los Angeles | 90001 | 33.973616 | -118.24902 | 54492 | 29.85 | 3 | Joined | 0 | 27 | 0 | 0 |
df4=df2.copy()
#Group by Churn Value, Month-to-Month contracts, Monthly Charges, and Satisfaction Score which confirm the results
#of our original decision tree.
df4.groupby(['Churn Value','Contract_Month-to-month']).agg({'Internet Service_Fiber optic':'mean',\
'Monthly Charges':'mean','Satisfaction Score':'mean'}).style.background_gradient(cmap = 'Blues')
| Internet Service_Fiber optic | Monthly Charges | Satisfaction Score | ||
|---|---|---|---|---|
| Churn Value | Contract_Month-to-month | |||
| 0 | 0 | 0.281991 | 61.116689 | 3.746445 |
| 1 | 0.435135 | 61.462635 | 3.847748 | |
| 1 | 0 | 0.630841 | 85.438084 | 1.906542 |
| 1 | 0.702115 | 73.019396 | 1.714199 |
#Groupby of Churn Value which shows of those customers who churned, a greater proportion of them
#were Fiber Optic Customers, had month-to-month contracts, and a lower satisfaction score.
df4.groupby('Churn Value').agg({'Contract_Month-to-month':'mean','Internet Service_Fiber optic':'mean','Monthly Charges':'mean',\
'Satisfaction Score':'mean'})
| Contract_Month-to-month | Internet Service_Fiber optic | Monthly Charges | Satisfaction Score | |
|---|---|---|---|---|
| Churn Value | ||||
| 0 | 0.429068 | 0.347700 | 61.265124 | 3.789911 |
| 1 | 0.885500 | 0.693954 | 74.441332 | 1.736223 |
#The average satisfaction score is consistently lower for customers who have monthly charges above $94/month.
#Almost the entirety of customers who pay more than $94/month subscribe to Fiber Optic.
df4['Mcharge_bin'] = pd.cut(df4['Monthly Charges'], bins=[0,93.68,119])
df4.groupby(['Churn Value','Contract_Month-to-month','Mcharge_bin']).agg({'Internet Service_Fiber optic':'mean',\
'Monthly Charges':'mean','Satisfaction Score':'mean'}).style.background_gradient(cmap = 'Blues')
| Internet Service_Fiber optic | Monthly Charges | Satisfaction Score | |||
|---|---|---|---|---|---|
| Churn Value | Contract_Month-to-month | Mcharge_bin | |||
| 0 | 0 | (0.0, 93.68] | 0.076253 | 48.461917 | 3.765577 |
| (93.68, 119.0] | 0.998483 | 105.187557 | 3.679818 | ||
| 1 | (0.0, 93.68] | 0.340000 | 54.865868 | 3.859474 | |
| (93.68, 119.0] | 1.000000 | 100.630937 | 3.778125 | ||
| 1 | 0 | (0.0, 93.68] | 0.168421 | 61.501053 | 2.010526 |
| (93.68, 119.0] | 1.000000 | 104.547479 | 1.823529 | ||
| 1 | (0.0, 93.68] | 0.614241 | 65.150469 | 1.741002 | |
| (93.68, 119.0] | 1.000000 | 99.694430 | 1.623342 |
#Group Churn Reasons with highest on top
df1[df1['Churn Reason']!='0'].groupby('Churn Reason').agg({'Churn Value':'size'}).\
nlargest(20, 'Churn Value').style.background_gradient(cmap = 'Blues')
| Churn Value | |
|---|---|
| Churn Reason | |
| Competitor had better devices | 313 |
| Competitor made better offer | 311 |
| Attitude of support person | 220 |
| Don't know | 130 |
| Competitor offered more data | 117 |
| Competitor offered higher download speeds | 100 |
| Attitude of service provider | 94 |
| Price too high | 78 |
| Product dissatisfaction | 77 |
| Network reliability | 72 |
| Long distance charges | 64 |
| Service dissatisfaction | 63 |
| Moved | 46 |
| Extra data charges | 39 |
| Limited range of services | 37 |
| Poor expertise of online support | 31 |
| Lack of affordable download/upload speed | 30 |
| Lack of self-service on Website | 29 |
| Poor expertise of phone support | 12 |
| Deceased | 6 |
#Drop the columns that have similar information as other columns; location information (since we can eyeball location from the map);
#and columns that are highly correlated with churn rate.
X= df_churn.drop(columns = ['Paperless Billing','Total Charges','Under 30','Latitude','Longitude','City','Zip Code',\
'Satisfaction Score','Churn Value','Churn Score','Churn Category','churn_rate'])
Y = df_churn['churn_rate']
dt = sklearn.tree.DecisionTreeRegressor(max_depth = 2)
dt.fit(X,Y)
dt_feature_names = list(X.columns)
dt_target_names = np.array(Y.unique(),dtype=np.string_)
tree.export_graphviz(dt, out_file='tree.dot',
feature_names=dt_feature_names, class_names=dt_target_names,
filled=True)
graph = pydotplus.graph_from_dot_file('tree.dot')
Image(graph.create_png())
DecisionTreeRegressor(max_depth=2)
#Plot churn rates by population size bin to confirm Decision Tree results.
df_churn['Population_bin'] = pd.cut(df_churn['Population'],bins=[0,27249,110000])
df_churn.groupby('Population_bin')['churn_rate'].mean().plot(kind='bar')
<AxesSubplot:xlabel='Population_bin'>
#Groupby to get zip codes with highest churn rates
ChurnBy_zip1 = df1.groupby(['Zip Code']).agg({'Churn Value':'size','Latitude':'mean',\
'Longitude':'mean','churn_rate':'mean',})
ChurnBy_zip2 = df1.groupby(['Zip Code']).agg({'Churn Value':'size','Latitude':'mean',\
'Longitude':'mean','churn_rate':'mean',}).nlargest(40, 'churn_rate')
#Map by highest churn quantity by Zip Code, color by churn rate and size by quantity of customers that churned
import plotly.express as px
px.set_mapbox_access_token ='pk.eyJ1IjoiYm1hcmluMiIsImEiOiJja3dicGplOTIydm54Mm9tbnQ2cWFwejV6In0.4VaP0Cmn09tQ07XOlwI_ZQ'
fig = px.scatter_mapbox(data_frame=ChurnBy_zip1,
lat="Latitude",
lon="Longitude",
color="churn_rate",
size = "Churn Value",
color_continuous_scale='rdpu')
fig.update_layout(mapbox_style="carto-positron",\
mapbox_zoom=4.4, mapbox_center = {"lat": 37.5, "lon": -117})
#Map with top churn ratios by Zip Code, color by churn rate and size by count of customers
px.set_mapbox_access_token ='pk.eyJ1IjoiYm1hcmluMiIsImEiOiJja3dicGplOTIydm54Mm9tbnQ2cWFwejV6In0.4VaP0Cmn09tQ07XOlwI_ZQ'
fig = px.scatter_mapbox(data_frame=ChurnBy_zip2,
lat="Latitude",
lon="Longitude",
color="churn_rate",
size = "Churn Value",
color_continuous_scale='turbo')
fig.update_layout(mapbox_style="carto-positron",\
mapbox_zoom=4.4, mapbox_center = {"lat": 37.5, "lon": -117})
df1['geo_bin'] = pd.cut(df1.Latitude, bins=[30,33,35,37,42])
sns.catplot(hue='Churn Reason',y='geo_bin', data=df1[df1['Churn Category']!='0'],\
palette="Set2", kind='count',aspect=2)
<seaborn.axisgrid.FacetGrid at 0x7fe9ccc2f040>
pd.set_option('display.max_rows', 80)
df1[df1['Churn Reason']!='0'].groupby(['geo_bin','Churn Reason']).agg({'Churn Value':'size',\
'Internet Service_Fiber optic':'mean','Senior Citizen':'mean',\
'Contract_Month-to-month':'mean', 'Tenure':'mean'}).sort_values(ascending=False,\
by=['geo_bin','Churn Value']).rename(columns={'Customer ID':'# Churned',\
'Internet Service_Fiber optic':'Fiber %',\
'Senior Citizen':'Senior %','Contract_Month-to-month':'Contract_M2M %'})
| Churn Value | Fiber % | Senior % | Contract_M2M % | Tenure | ||
|---|---|---|---|---|---|---|
| geo_bin | Churn Reason | |||||
| (37, 42] | Competitor had better devices | 151 | 0.768212 | 0.304636 | 0.933775 | 14.841060 |
| Competitor offered more data | 73 | 0.794521 | 0.164384 | 0.835616 | 25.095890 | |
| Attitude of support person | 69 | 0.681159 | 0.463768 | 0.942029 | 18.507246 | |
| Competitor made better offer | 67 | 0.731343 | 0.328358 | 0.925373 | 14.298507 | |
| Competitor offered higher download speeds | 57 | 0.719298 | 0.087719 | 0.842105 | 20.614035 | |
| Don't know | 52 | 0.750000 | 0.288462 | 0.884615 | 19.153846 | |
| Price too high | 43 | 0.651163 | 0.279070 | 0.813953 | 22.279070 | |
| Network reliability | 39 | 0.564103 | 0.256410 | 0.871795 | 17.282051 | |
| Product dissatisfaction | 34 | 0.705882 | 0.352941 | 0.764706 | 23.911765 | |
| Service dissatisfaction | 34 | 0.735294 | 0.176471 | 0.941176 | 20.735294 | |
| Long distance charges | 33 | 0.393939 | 0.060606 | 0.818182 | 13.060606 | |
| Attitude of service provider | 30 | 0.633333 | 0.266667 | 0.866667 | 20.333333 | |
| Moved | 22 | 0.681818 | 0.090909 | 0.818182 | 16.772727 | |
| Limited range of services | 19 | 0.736842 | 0.263158 | 0.894737 | 13.789474 | |
| Extra data charges | 18 | 0.722222 | 0.277778 | 1.000000 | 21.722222 | |
| Poor expertise of online support | 17 | 0.705882 | 0.294118 | 0.823529 | 15.411765 | |
| Lack of affordable download/upload speed | 14 | 0.785714 | 0.142857 | 0.857143 | 21.285714 | |
| Lack of self-service on Website | 13 | 0.692308 | 0.000000 | 0.923077 | 14.000000 | |
| Poor expertise of phone support | 7 | 0.571429 | 0.000000 | 0.714286 | 15.571429 | |
| Deceased | 3 | 0.666667 | 0.333333 | 1.000000 | 9.333333 | |
| (35, 37] | Competitor had better devices | 35 | 0.742857 | 0.200000 | 0.942857 | 12.771429 |
| Attitude of support person | 28 | 0.607143 | 0.250000 | 0.892857 | 14.642857 | |
| Attitude of service provider | 25 | 0.720000 | 0.160000 | 0.760000 | 19.520000 | |
| Competitor made better offer | 12 | 0.666667 | 0.500000 | 0.833333 | 11.833333 | |
| Don't know | 11 | 0.818182 | 0.090909 | 1.000000 | 14.636364 | |
| Competitor offered higher download speeds | 9 | 0.666667 | 0.222222 | 0.777778 | 17.222222 | |
| Product dissatisfaction | 7 | 0.571429 | 0.428571 | 0.857143 | 25.142857 | |
| Competitor offered more data | 6 | 1.000000 | 0.166667 | 0.833333 | 14.666667 | |
| Long distance charges | 6 | 0.333333 | 0.000000 | 0.833333 | 19.000000 | |
| Moved | 5 | 1.000000 | 0.400000 | 0.600000 | 36.200000 | |
| Price too high | 5 | 0.800000 | 0.800000 | 1.000000 | 21.000000 | |
| Service dissatisfaction | 4 | 1.000000 | 0.250000 | 0.750000 | 22.250000 | |
| Extra data charges | 3 | 1.000000 | 0.333333 | 1.000000 | 12.000000 | |
| Lack of affordable download/upload speed | 3 | 0.666667 | 0.333333 | 0.666667 | 19.000000 | |
| Lack of self-service on Website | 3 | 0.333333 | 0.000000 | 1.000000 | 24.333333 | |
| Network reliability | 3 | 0.666667 | 0.000000 | 1.000000 | 9.666667 | |
| Poor expertise of online support | 3 | 1.000000 | 0.666667 | 1.000000 | 15.000000 | |
| Poor expertise of phone support | 2 | 0.500000 | 0.500000 | 1.000000 | 8.500000 | |
| Limited range of services | 1 | 0.000000 | 0.000000 | 1.000000 | 13.000000 | |
| Deceased | 0 | NaN | NaN | NaN | NaN | |
| (33, 35] | Attitude of support person | 113 | 0.628319 | 0.194690 | 0.929204 | 16.681416 |
| Competitor had better devices | 113 | 0.743363 | 0.230088 | 0.893805 | 18.309735 | |
| Competitor made better offer | 84 | 0.726190 | 0.297619 | 0.892857 | 17.011905 | |
| Don't know | 50 | 0.520000 | 0.140000 | 0.900000 | 16.000000 | |
| Attitude of service provider | 35 | 0.771429 | 0.314286 | 0.914286 | 12.000000 | |
| Competitor offered more data | 34 | 0.764706 | 0.352941 | 0.852941 | 20.882353 | |
| Product dissatisfaction | 31 | 0.709677 | 0.225806 | 0.838710 | 21.935484 | |
| Competitor offered higher download speeds | 30 | 0.566667 | 0.166667 | 0.733333 | 22.666667 | |
| Network reliability | 29 | 0.793103 | 0.275862 | 0.896552 | 16.241379 | |
| Price too high | 29 | 0.724138 | 0.344828 | 0.965517 | 18.482759 | |
| Service dissatisfaction | 25 | 0.600000 | 0.320000 | 0.960000 | 14.200000 | |
| Long distance charges | 24 | 0.500000 | 0.333333 | 0.833333 | 18.791667 | |
| Moved | 19 | 0.736842 | 0.210526 | 0.842105 | 17.157895 | |
| Extra data charges | 18 | 0.666667 | 0.333333 | 0.833333 | 26.277778 | |
| Limited range of services | 17 | 0.647059 | 0.588235 | 0.941176 | 11.117647 | |
| Lack of affordable download/upload speed | 13 | 0.538462 | 0.384615 | 0.769231 | 19.769231 | |
| Lack of self-service on Website | 11 | 0.454545 | 0.000000 | 1.000000 | 13.818182 | |
| Poor expertise of online support | 8 | 0.500000 | 0.125000 | 1.000000 | 17.375000 | |
| Deceased | 3 | 0.666667 | 0.333333 | 1.000000 | 8.333333 | |
| Poor expertise of phone support | 3 | 0.666667 | 0.000000 | 0.666667 | 13.333333 | |
| (30, 33] | Competitor made better offer | 148 | 0.736486 | 0.250000 | 0.898649 | 17.500000 |
| Don't know | 17 | 0.647059 | 0.176471 | 0.823529 | 25.647059 | |
| Competitor had better devices | 14 | 0.642857 | 0.357143 | 0.928571 | 21.500000 | |
| Attitude of support person | 10 | 0.700000 | 0.300000 | 1.000000 | 23.700000 | |
| Product dissatisfaction | 5 | 0.800000 | 0.200000 | 0.800000 | 32.800000 | |
| Attitude of service provider | 4 | 0.750000 | 0.500000 | 0.500000 | 29.250000 | |
| Competitor offered higher download speeds | 4 | 0.500000 | 0.000000 | 1.000000 | 7.750000 | |
| Competitor offered more data | 4 | 0.750000 | 0.500000 | 1.000000 | 19.750000 | |
| Poor expertise of online support | 3 | 1.000000 | 0.666667 | 0.666667 | 30.000000 | |
| Lack of self-service on Website | 2 | 0.500000 | 0.000000 | 1.000000 | 3.500000 | |
| Long distance charges | 1 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | |
| Network reliability | 1 | 0.000000 | 0.000000 | 0.000000 | 54.000000 | |
| Price too high | 1 | 1.000000 | 0.000000 | 1.000000 | 12.000000 | |
| Deceased | 0 | NaN | NaN | NaN | NaN | |
| Extra data charges | 0 | NaN | NaN | NaN | NaN | |
| Lack of affordable download/upload speed | 0 | NaN | NaN | NaN | NaN | |
| Limited range of services | 0 | NaN | NaN | NaN | NaN | |
| Moved | 0 | NaN | NaN | NaN | NaN | |
| Poor expertise of phone support | 0 | NaN | NaN | NaN | NaN | |
| Service dissatisfaction | 0 | NaN | NaN | NaN | NaN |
pd.set_option('display.max_rows', 80)
Churn_Reason = df1[df1['Churn Reason']!='0'].groupby(['geo_bin','Churn Reason']).agg({'Churn Value':'size'})\
.sort_values(ascending=False,\
by=['geo_bin','Churn Value']).rename(columns={'Churn Value':'Churn Reason Count'})
Churn_Reason_Percent = Churn_Reason.groupby(level=0).apply(lambda x:
(x*100) / x.sum()).rename(columns={'Churn Reason Count':'Churn Reason %'})
Churn_Reason_Percent.groupby('geo_bin',as_index=False).apply(lambda x : x.sort_values(by = 'Churn Reason %', ascending = False).head(5))\
.style.background_gradient(cmap = 'Blues')
| Churn Reason % | |||
|---|---|---|---|
| geo_bin | Churn Reason | ||
| 0 | (30, 33] | Competitor made better offer | 69.158879 |
| Don't know | 7.943925 | ||
| Competitor had better devices | 6.542056 | ||
| Attitude of support person | 4.672897 | ||
| Product dissatisfaction | 2.336449 | ||
| 1 | (33, 35] | Attitude of support person | 16.400581 |
| Competitor had better devices | 16.400581 | ||
| Competitor made better offer | 12.191582 | ||
| Don't know | 7.256894 | ||
| Attitude of service provider | 5.079826 | ||
| 2 | (35, 37] | Competitor had better devices | 20.467836 |
| Attitude of support person | 16.374269 | ||
| Attitude of service provider | 14.619883 | ||
| Competitor made better offer | 7.017544 | ||
| Don't know | 6.432749 | ||
| 3 | (37, 42] | Competitor had better devices | 18.993711 |
| Competitor offered more data | 9.182390 | ||
| Attitude of support person | 8.679245 | ||
| Competitor made better offer | 8.427673 | ||
| Competitor offered higher download speeds | 7.169811 |
df1.head()
| Senior Citizen | Partner | Dependents | Tenure | Phone Service | Multiple Lines | Online Security | Online Backup | Device Protection | Tech Support | Streaming TV | Streaming Movies | Paperless Billing | Payment Method | Monthly Charges | Total Charges | Gender | Age | Under 30 | Number of Dependents | City | Zip Code | Latitude | Longitude | Population | Total Revenue | Satisfaction Score | Churn Value | Churn Score | Churn Category | Churn Reason | churn_rate | Internet Service_0 | Internet Service_DSL | Internet Service_Fiber optic | Contract_Month-to-month | Contract_One year | Contract_Two year | geo_bin | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | Electronic check | 29.85 | 29.85 | Female | 36 | 0 | 0 | Los Angeles | 90001 | 33.973616 | -118.249020 | 54492 | 29.85 | 3 | 0 | 27 | 0 | 0 | 0.25 | 0 | 1 | 0 | 1 | 0 | 0 | (33, 35] |
| 1 | 0 | 0 | 0 | 34 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | Mailed check | 56.95 | 1889.50 | Male | 46 | 0 | 0 | Los Angeles | 90002 | 33.949255 | -118.246978 | 44586 | 2470.56 | 3 | 0 | 61 | 0 | 0 | 0.00 | 0 | 1 | 0 | 0 | 1 | 0 | (33, 35] |
| 2 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | Mailed check | 53.85 | 108.15 | Male | 37 | 0 | 0 | Los Angeles | 90003 | 33.964131 | -118.272783 | 58198 | 129.09 | 1 | 1 | 86 | Competitor | Competitor made better offer | 0.20 | 0 | 1 | 0 | 1 | 0 | 0 | (33, 35] |
| 3 | 0 | 0 | 0 | 45 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | Bank transfer (automatic) | 42.30 | 1840.75 | Male | 53 | 0 | 0 | Los Angeles | 90004 | 34.076259 | -118.310715 | 67852 | 1840.75 | 3 | 0 | 66 | 0 | 0 | 0.40 | 0 | 1 | 0 | 0 | 1 | 0 | (33, 35] |
| 4 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | Electronic check | 70.70 | 151.65 | Female | 19 | 1 | 2 | Los Angeles | 90005 | 34.059281 | -118.307420 | 43019 | 169.89 | 2 | 1 | 67 | Other | Moved | 0.50 | 0 | 0 | 1 | 1 | 0 | 0 | (33, 35] |
df5 = df1.copy()
#Renaming columns
df5.rename(columns=\
{'Senior Citizen':'Senior_Citizen', 'Phone Service':'Phone_Service','Multiple Lines':'Multiple_Lines',\
'Online Security':'Online_Security','Online Backup':'Online_Backup','Device Protection':'Device_Protection',\
'Tech Support':'Tech_Support','Streaming TV':'Streaming_TV','Streaming Movies':'Streaming_Movies',\
'Paperless Billing':'Paperless_Billing','Payment Method':'Payment_Method','Monthly Charges':'Monthly_Charges',\
'Total Charges':'Total_Charges','Satisfaction Score':'Satisfaction_Score','Churn Value':'Churn_Value',\
'Churn Score':'Churn_Score','Churn Category':'Churn_Category','Churn Reason':'Churn_Reason'},inplace=True)
senior_citizen_churn = df5.groupby(['Senior_Citizen','Churn_Value']).agg({'Churn_Value':'count'})
Senior_Churn_Percents = senior_citizen_churn.groupby(level=0).apply(lambda x:
(x*100) / x.sum()).rename(columns={'Churn_Value':'Churn %'})
df5.groupby(['Senior_Citizen','Churn_Value']).agg({'Churn_Value':'count'})\
.rename(columns={'Churn_Value':'Churn Count'})
Senior_Churn_Percents
| Churn Count | ||
|---|---|---|
| Senior_Citizen | Churn_Value | |
| 0 | 0 | 4508 |
| 1 | 1393 | |
| 1 | 0 | 666 |
| 1 | 476 |
| Churn % | ||
|---|---|---|
| Senior_Citizen | Churn_Value | |
| 0 | 0 | 76.393832 |
| 1 | 23.606168 | |
| 1 | 0 | 58.318739 |
| 1 | 41.681261 |
sns.countplot(x='Churn_Value', hue='Senior_Citizen', data=df5)
<AxesSubplot:xlabel='Churn_Value', ylabel='count'>
tech_support_senior = df5[df5.Senior_Citizen == 1].groupby(['Senior_Citizen','Churn_Value','Tech_Support'])\
.agg({'Churn_Value':'count'})
tech_support_senior_percents = tech_support_senior.groupby(level=0).apply(lambda x:
(x*100) / x.sum()).rename(columns={'Churn_Value':'Churn %'})
df5[df5.Senior_Citizen == 1].groupby(['Senior_Citizen','Churn_Value','Tech_Support'])\
.agg({'Churn_Value':'count'})
tech_support_senior_percents
| Churn_Value | |||
|---|---|---|---|
| Senior_Citizen | Churn_Value | Tech_Support | |
| 1 | 0 | 0 | 457 |
| 1 | 209 | ||
| 1 | 0 | 425 | |
| 1 | 51 |
| Churn % | |||
|---|---|---|---|
| Senior_Citizen | Churn_Value | Tech_Support | |
| 1 | 0 | 0 | 40.017513 |
| 1 | 18.301226 | ||
| 1 | 0 | 37.215412 | |
| 1 | 4.465849 |
sns.countplot(x='Churn_Value', hue='Tech_Support', data=df5[df5.Senior_Citizen==1])
<AxesSubplot:xlabel='Churn_Value', ylabel='count'>
tech_support_nonsenior = df5[df5.Senior_Citizen == 0].groupby(['Senior_Citizen','Churn_Value','Tech_Support'])\
.agg({'Churn_Value':'count'})
tech_support_nonsenior_percents = tech_support_nonsenior.groupby(level=0).apply(lambda x:
(x*100) / x.sum()).rename(columns={'Churn_Value':'Churn %'})
df5[df5.Senior_Citizen == 0].groupby(['Senior_Citizen','Churn_Value','Tech_Support'])\
.agg({'Churn_Value':'count'})
tech_support_nonsenior_percents
| Churn_Value | |||
|---|---|---|---|
| Senior_Citizen | Churn_Value | Tech_Support | |
| 0 | 0 | 0 | 2983 |
| 1 | 1525 | ||
| 1 | 0 | 1134 | |
| 1 | 259 |
| Churn % | |||
|---|---|---|---|
| Senior_Citizen | Churn_Value | Tech_Support | |
| 0 | 0 | 0 | 50.550754 |
| 1 | 25.843077 | ||
| 1 | 0 | 19.217082 | |
| 1 | 4.389087 |
df5.Satisfaction_Score[(df5.Senior_Citizen == 1) & (df5.Tech_Support == 1)].mean()
df5.Satisfaction_Score[(df5.Senior_Citizen == 1) & (df5.Tech_Support == 0)].mean()
3.3038461538461537
2.815192743764172
df5.Satisfaction_Score[(df5.Senior_Citizen == 0) & (df5.Tech_Support == 1)].mean()
df5.Satisfaction_Score[(df5.Senior_Citizen == 0) & (df5.Tech_Support == 0)].mean()
3.344170403587444
3.290259897983969
Fiber_optic = df5[df5.Churn_Value==1].groupby(['Senior_Citizen','Internet Service_Fiber optic'])\
.agg({'Internet Service_Fiber optic':'count'})
Fiber_optic_percents = Fiber_optic.groupby(level=0).apply(lambda x:
(x*100) / x.sum()).rename(columns={'Internet Service_Fiber optic':'Internet Service %'})
Fiber_optic
Fiber_optic_percents.style.background_gradient(cmap = 'Blues')
| Internet Service_Fiber optic | ||
|---|---|---|
| Senior_Citizen | Internet Service_Fiber optic | |
| 0 | 0 | 489 |
| 1 | 904 | |
| 1 | 0 | 83 |
| 1 | 393 |
| Internet Service % | ||
|---|---|---|
| Senior_Citizen | Internet Service_Fiber optic | |
| 0 | 0 | 35.104092 |
| 1 | 64.895908 | |
| 1 | 0 | 17.436975 |
| 1 | 82.563025 |
</div>
!jupyter nbconvert --to html Telco_Churn_FinalVersion.ipynb
[NbConvertApp] WARNING | pattern 'Telco_Churn_FinalVersion.ipynb' matched no files
This application is used to convert notebook files (*.ipynb) to various other
formats.
WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.
Options
=======
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
<cmd> --help-all
--debug
set log level to logging.DEBUG (maximize logging output)
Equivalent to: [--Application.log_level=10]
--generate-config
generate default config file
Equivalent to: [--JupyterApp.generate_config=True]
-y
Answer yes to any questions instead of prompting.
Equivalent to: [--JupyterApp.answer_yes=True]
--execute
Execute the notebook prior to export.
Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
Equivalent to: [--ExecutePreprocessor.allow_errors=True]
--stdin
read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
Equivalent to: [--NbConvertApp.from_stdin=True]
--stdout
Write notebook output to stdout instead of files.
Equivalent to: [--NbConvertApp.writer_class=StdoutWriter]
--inplace
Run nbconvert in place, overwriting the existing notebook (only
relevant when converting to notebook format)
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory=]
--clear-output
Clear output of current file and save in place,
overwriting the existing notebook.
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --ClearOutputPreprocessor.enabled=True]
--no-prompt
Exclude input and output prompts from converted document.
Equivalent to: [--TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True]
--no-input
Exclude input cells and output prompts from converted document.
This mode is ideal for generating code-free reports.
Equivalent to: [--TemplateExporter.exclude_output_prompt=True --TemplateExporter.exclude_input=True]
--allow-chromium-download
Whether to allow downloading chromium if no suitable version is found on the system.
Equivalent to: [--WebPDFExporter.allow_chromium_download=True]
--log-level=<Enum>
Set the log level by value or name.
Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL']
Default: 30
Equivalent to: [--Application.log_level]
--config=<Unicode>
Full path of a config file.
Default: ''
Equivalent to: [--JupyterApp.config_file]
--to=<Unicode>
The export format to be used, either one of the built-in formats
['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf',
'python', 'rst', 'script', 'slides', 'webpdf'] or a dotted object name that
represents the import path for an `Exporter` class
Default: ''
Equivalent to: [--NbConvertApp.export_format]
--template=<Unicode>
Name of the template to use
Default: ''
Equivalent to: [--TemplateExporter.template_name]
--template-file=<Unicode>
Name of the template file to use
Default: None
Equivalent to: [--TemplateExporter.template_file]
--writer=<DottedObjectName>
Writer class used to write the results of the conversion
Default: 'FilesWriter'
Equivalent to: [--NbConvertApp.writer_class]
--post=<DottedOrNone>
PostProcessor class used to write the results of the conversion
Default: ''
Equivalent to: [--NbConvertApp.postprocessor_class]
--output=<Unicode>
overwrite base name use for output files. can only be used when converting
one notebook at a time.
Default: ''
Equivalent to: [--NbConvertApp.output_base]
--output-dir=<Unicode>
Directory to write output(s) to. Defaults to output to the directory of each
notebook. To recover previous default behaviour (outputting to the current
working directory) use . as the flag value.
Default: ''
Equivalent to: [--FilesWriter.build_directory]
--reveal-prefix=<Unicode>
The URL prefix for reveal.js (version 3.x). This defaults to the reveal CDN,
but can be any url pointing to a copy of reveal.js.
For speaker notes to work, this must be a relative path to a local copy of
reveal.js: e.g., "reveal.js".
If a relative path is given, it must be a subdirectory of the current
directory (from which the server is run).
See the usage documentation
(https://nbconvert.readthedocs.io/en/latest/usage.html#reveal-js-html-
slideshow) for more details.
Default: ''
Equivalent to: [--SlidesExporter.reveal_url_prefix]
--nbformat=<Enum>
The nbformat version to write. Use this to downgrade notebooks.
Choices: any of [1, 2, 3, 4]
Default: 4
Equivalent to: [--NotebookExporter.nbformat_version]
Examples
--------
The simplest way to use nbconvert is
> jupyter nbconvert mynotebook.ipynb --to html
Options include ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'rst', 'script', 'slides', 'webpdf'].
> jupyter nbconvert --to latex mynotebook.ipynb
Both HTML and LaTeX support multiple output templates. LaTeX includes
'base', 'article' and 'report'. HTML includes 'basic' and 'full'. You
can specify the flavor of the format used.
> jupyter nbconvert --to html --template lab mynotebook.ipynb
You can also pipe the output to stdout, rather than a file
> jupyter nbconvert mynotebook.ipynb --stdout
PDF is generated via latex
> jupyter nbconvert mynotebook.ipynb --to pdf
You can get (and serve) a Reveal.js-powered slideshow
> jupyter nbconvert myslides.ipynb --to slides --post serve
Multiple notebooks can be given at the command line in a couple of
different ways:
> jupyter nbconvert notebook*.ipynb
> jupyter nbconvert notebook1.ipynb notebook2.ipynb
or you can specify the notebooks list in a config file, containing::
c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
> jupyter nbconvert --config mycfg.py
To see all available configurables, use `--help-all`.